module comb
    #(parameter NIN  = 21,
      parameter NOUT = 17)
    (
     input               clk,
     input               rstn,
     input               en,
     input [NIN-1:0]     din,
     input               valid,
     output [NOUT-1:0]   dout);


   //en delay
   reg [5:0]                 en_r ;
   always @(posedge clk or negedge rstn) begin
      if (!rstn) begin
         en_r <= 'b0 ;
      end
      else if (en) begin
         en_r <= {en_r[5:0], en} ;
      end

   end

   /*
   reg [NOUT-1:0]       dx [5:0] ;
   wire [NOUT-1:0]      sx [5:0] ;
   assign sx[0] =       din ;           //data shift as FIR filter
   assign sx[1] =       din - dx[0] ;   //first stage of FIR
   assign sx[2] =       dx[1] ;
   assign sx[3] =       dx[1] - dx[2] ; //2nd stage of FIR
   assign sx[4] =       dx[3] ;
   assign sx[5] =       dx[3] - dx[4] ; //3rd stage of FIR
   integer                   i ;
   always @(posedge clk or negedge rstn) begin
      if (!rstn) begin
         for(i=0; i<=5; i=i+1) begin
            dx[i]     <= 'b0 ;
         end
      end
      else if (en_r[i]) begin
         for(i=0; i<=5; i=i+1) begin
            dx[i]     <= sx[i] ;
         end
      end
   end
    */

   reg [NOUT-1:0]            d1, d1_d, d2, d2_d, d3, d3_d ;
   //stage 1, as fir filter, shift and add(sub), no need for multiplier
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d1     <= 'b0 ;
      else if (en)      d1     <= din ;
   end
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d1_d   <= 'b0 ;
      else if (en)      d1_d   <= d1 ;
   end
   wire [NOUT-1:0]      s1_out = d1 - d1_d ;

   //stage 2
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d2     <= 'b0 ;
      else if (en)      d2     <= s1_out ;
   end
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d2_d   <= 'b0 ;
      else if (en)      d2_d   <= d2 ;
   end
   wire [NOUT-1:0]      s2_out = d2 - d2_d ;

   //stage 3
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d3     <= 'b0 ;
      else if (en)      d3     <= s2_out ;
   end
   always @(posedge clk or negedge rstn) begin
      if (!rstn)        d3_d   <= 'b0 ;
      else if (en)      d3_d   <= d3 ;
   end
   wire [NOUT-1:0]      s3_out = d3 - d3_d ;

   //tap the output data for better display
   reg [NOUT-1:0]       dout_r ;
   reg                  valid_r ;
   always @(posedge clk or negedge rstn) begin
      if (!rstn) begin
         dout_r         <= 'b0 ;
         valid_r        <= 'b0 ;
      end
      else if (en) begin
         dout_r         <= s3_out ;
         valid_r        <= 1'b1 ;
      end
      else begin
         valid_r        <= 1'b0 ;
      end
   end
   assign       dout    = dout_r ;
   assign       valid   = valid_r ;

endmodule
